from scrapy import log
from scrapy.selector import HtmlXPathSelector
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule
from wuliu.items import CrawlersItem
from pyquery import PyQuery as pq

class BaiduSpider(CrawlSpider):
    name = 'baidu'
    allowed_domains = ['dmoz.org']
    start_urls = [
       "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
       "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
    ]

    def parse(self, response):
        log.msg(response.body_as_unicode())
        self.pq = pq(response.body_as_unicode()) 
        #hxs = HtmlXPathSelector(response)
        #i = CrawlersItem()
        #i['domain_id'] = hxs.select('//input[@id="sid"]/@value').extract()
        #i['name'] = hxs.select('//div[@id="name"]').extract()
        #i['description'] = hxs.select('//div[@id="description"]').extract()
        return self
